library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Setup

SNPs <- read_tsv("data/23andMe_complete.txt", comment = '#',
col_types = 
  cols(
    rsid = col_character(),
    chromosome = col_factor(),
    position = col_integer(),
    genotype = col_factor()
  ))
ggplot(data = SNPs) + 
  geom_bar(mapping = aes(x = genotype)) + 
  ggtitle("Total SNPs for each genotype") +
  ylab("Total number of SNPs") +
  xlab("Genotype")

pdf("images/SNP_example_plot.pdf", width=6, height=3)
ggplot(data = SNPs) + 
  geom_bar(mapping = aes(x = chromosome, fill = genotype))
dev.off()
## quartz_off_screen 
##                 2
# Plot graph to a png outputfile
ppi <- 300
png("images/SNP_example_plot.png", width=6*ppi, height=6*ppi, res=ppi)
ggplot(data = SNPs) + 
  geom_bar(mapping = aes(x = chromosome, fill = genotype))
dev.off()
## quartz_off_screen 
##                 2

Genotype counts per chromosome

# Version 1 1
p <- ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) + 
  geom_point()
ggplotly(p)
# Version 2
ggplotly(
  ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) + 
    geom_point()
 )

Exercises 1

ggplot(data = SNPs, aes(x=chromosome)) + 
  geom_bar(fill="yellow", color="black") +
  ggtitle("Total SNP Count foreach Chromosome") +
  xlab("Chromosome") +
  ylab("Count")

Exercises 2

ggplot(data = SNPs) + 
  geom_bar(aes(x = chromosome, fill = genotype), color="black") +
  ggtitle("Total SNP Count foreach Chromosome") +
  xlab("Chromosome") +
  ylab("Count") +
  theme(legend.position="bottom")

Exercises 3

ggplotly(
  ggplot(data = SNPs) + 
  geom_bar(aes(x = chromosome, fill = genotype), color="black") +
  labs(title = str_wrap("Total number of SNPs on each chromosome", 25)) +
  xlab("Chromosome") +
  ylab("Count")
)

Exercises 4

ggplot(data = SNPs) + 
  geom_bar(aes(x = chromosome, fill = genotype), color="black") +
  ggtitle("Total SNP Count foreach Chromosome") +
  xlab("Chromosome") +
  ylab("Count") +
  facet_wrap(facets = vars(genotype)) +
  theme(legend.position="bottom") +
  theme(text = element_text(size=10),
        axis.text.x = element_text(size=4)) 

Exercises 5

ggplot(data = SNPs) + 
  geom_bar(aes(x = chromosome, fill = genotype), color="black") +
  ggtitle("Total SNP Count foreach Chromosome") +
  xlab("Chromosome") +
  ylab("Count") +
  facet_wrap(facets = vars(genotype),scales="free_y") +
  theme(text = element_text(size=10),
        axis.text.x = element_text(size=4)) +
  theme(legend.position="bottom")

Exercises 6

ggplot(data = SNPs) + 
  geom_bar(aes(x = chromosome, fill = genotype), color="black") +
  ggtitle("Total SNP Count foreach Chromosome") +
  xlab("Chromosome") +
  ylab("Count") +
  theme(legend.position="bottom")

# Plot graph to a png outputfile
ppi <- 300
png("images/SNP.png", width=15*ppi, height=10*ppi, res=ppi)
ggplot(data = SNPs) + 
  geom_bar(mapping = aes(x = chromosome, fill = genotype)) + 
  ggtitle("Total SNP Count foreach Chromosome Stacked") 
dev.off()
## quartz_off_screen 
##                 2

Exercises 7

Stacked genotype counts per chromosome